{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b2b233fb",
   "metadata": {},
   "source": [
    "# [6、PyTorch中搭建分类网络实例](https://www.bilibili.com/video/BV1VF411a7oz?spm_id_from=333.788.player.switch&vd_source=cdd897fffb54b70b076681c3c4e4d45d)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "173b6435",
   "metadata": {},
   "source": [
    "神经网络一般由层或者模块，模型一般定义在`torch.nn`中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "221fc0dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "from torchvision import datasets\n",
    "from torchvision.transforms import ToTensor\n",
    "from torch.utils.data import Dataset, DataLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "709045db",
   "metadata": {},
   "outputs": [],
   "source": [
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "04ab7a2a",
   "metadata": {},
   "source": [
    "## 基于手写数字识别构建分类模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50022aae",
   "metadata": {},
   "outputs": [],
   "source": [
    "training_data = datasets.FashionMNIST(\n",
    "    root=\"mnist_dataset\", train=True, download=True, transform=ToTensor()\n",
    ")\n",
    "\n",
    "test_data = datasets.FashionMNIST(\n",
    "    root=\"mnist_dataset\", train=False, download=True, transform=ToTensor()\n",
    ")\n",
    "\n",
    "train_iter = DataLoader(training_data, batch_size=64, shuffle=True, pin_memory=True)\n",
    "test_iter = DataLoader(test_data, batch_size=64, shuffle=False, pin_memory=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "fca6b469",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 定义 class\n",
    "from torch import Tensor\n",
    "\n",
    "class NeuralNetwork(nn.Module):\n",
    "    \"\"\"\"\"\"\n",
    "    def __init__(self) -> None:\n",
    "        super(NeuralNetwork, self).__init__()\n",
    "        self.flatten = nn.Flatten() # 变成一维张量\n",
    "        self.linear_relu_stack = nn.Sequential(\n",
    "            # 顺序连起来\n",
    "            nn.Linear(28*28, 512), # 输入特征维度，输入特征大小\n",
    "            nn.ReLU(inplace=True), # 加速\n",
    "            nn.Linear(512, 512),\n",
    "            nn.ReLU(inplace=True),\n",
    "            nn.Linear(512, 10)\n",
    "        )\n",
    "    def forward(self, x:Tensor):\n",
    "        x = self.flatten(x)\n",
    "        logits = self.linear_relu_stack(x)\n",
    "        return logits\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ad9ecf4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NeuralNetwork(\n",
      "  (flatten): Flatten(start_dim=1, end_dim=-1)\n",
      "  (linear_relu_stack): Sequential(\n",
      "    (0): Linear(in_features=784, out_features=512, bias=True)\n",
      "    (1): ReLU(inplace=True)\n",
      "    (2): Linear(in_features=512, out_features=512, bias=True)\n",
      "    (3): ReLU(inplace=True)\n",
      "    (4): Linear(in_features=512, out_features=10, bias=True)\n",
      "  )\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "# 实例化模块\n",
    "model = NeuralNetwork().to(device)\n",
    "print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9e67072a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=================================================================\n",
      "Layer (type:depth-idx)                   Param #\n",
      "=================================================================\n",
      "├─Flatten: 1-1                           --\n",
      "├─Sequential: 1-2                        --\n",
      "|    └─Linear: 2-1                       401,920\n",
      "|    └─ReLU: 2-2                         --\n",
      "|    └─Linear: 2-3                       262,656\n",
      "|    └─ReLU: 2-4                         --\n",
      "|    └─Linear: 2-5                       5,130\n",
      "=================================================================\n",
      "Total params: 669,706\n",
      "Trainable params: 669,706\n",
      "Non-trainable params: 0\n",
      "=================================================================\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "=================================================================\n",
       "Layer (type:depth-idx)                   Param #\n",
       "=================================================================\n",
       "├─Flatten: 1-1                           --\n",
       "├─Sequential: 1-2                        --\n",
       "|    └─Linear: 2-1                       401,920\n",
       "|    └─ReLU: 2-2                         --\n",
       "|    └─Linear: 2-3                       262,656\n",
       "|    └─ReLU: 2-4                         --\n",
       "|    └─Linear: 2-5                       5,130\n",
       "=================================================================\n",
       "Total params: 669,706\n",
       "Trainable params: 669,706\n",
       "Non-trainable params: 0\n",
       "================================================================="
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from torchsummary import summary\n",
    "# https://pypi.org/project/torch-summary/\n",
    "summary(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "f6d437c8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(8, device='cuda:0')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = torch.rand(1, 28, 28, device=device)\n",
    "logits:Tensor = model(X)\n",
    "logits.argmax()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "26550204",
   "metadata": {},
   "source": [
    "## nn.Flatten\n",
    "\n",
    "将张量展平为两个维度，只保留 start_dim、end_dim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "14d6dc3b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([5, 28, 28])\n",
      "torch.Size([5, 784])\n"
     ]
    }
   ],
   "source": [
    "x = torch.rand((5,28, 28))\n",
    "print(x.shape)\n",
    "x = nn.Flatten()(x)\n",
    "print(x.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ef7e9ded",
   "metadata": {},
   "source": [
    "## nn.Linear\n",
    "\n",
    "线性层，就是计算矩阵乘法\n",
    "- in_features：必须，特征维度\n",
    "- out_features：必须，输出样本特征维度\n",
    "- bias：偏置，也就是 w@x+b中的b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "a54582cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([5, 128])\n",
      "torch.Size([128, 784])\n",
      "torch.Size([128])\n"
     ]
    }
   ],
   "source": [
    "layer = nn.Linear(in_features=28*28, out_features=128)\n",
    "hidden1 = layer(x)\n",
    "print(hidden1.shape, layer.weight.shape, layer.bias.shape, sep='\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8bd01cc2",
   "metadata": {},
   "source": [
    "## nn.Relu\n",
    "\n",
    "非线性激活单元，非线性能够增加神经网络建模能力"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2da7ec99",
   "metadata": {},
   "source": [
    "## nn.Sequential\n",
    "\n",
    "一个**容器**，将一些Module作为参数，数据就会有序地经过模块算出结果"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b36e5f34",
   "metadata": {},
   "source": [
    "## nn.Softmax\n",
    "\n",
    "将数字转换为概率，概率的总和为1，公式为：\n",
    "\n",
    "$$\n",
    "x = \\frac{e^x}{\\sum_y{e^y}}\n",
    "$$\n",
    "\n",
    "> 简单推导，为了输出概率、总和为1。这里**概率非负**，那么使用**指数**。总和为1,那么归一化。\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18b93ca2",
   "metadata": {},
   "source": [
    "\n",
    "### Softmax 详细推导\n",
    "\n",
    "逻辑回归中，我们使用 Sigmoid 将 z 映射为概率：\n",
    "$$\n",
    "P(y=1|z) = \\frac{1}{1+e^{-z}} = \\frac{e^z}{e^z+1}=   \\frac{e^z}{e^z+e^0}\n",
    "$$\n",
    "\n",
    "\n",
    "#### 1. 回顾逻辑回归 (二分类)\n",
    "\n",
    "仔细观察 $P(y=1 | z)$ 的形式，它其实是两个值的归一化：$e^z$ 和 $e^0=1$。我们可以把二分类任务看作是两个类别，一个分数为 $z$，另一个分数为 $0$。那么 Sigmoid 就是 Softmax 在 $K=2$ 时的特例。\n",
    "\n",
    "$$\n",
    "\\text{Softmax}(z_1, z_2=0)_1 = \\frac{e^{z_1}}{e^{z_1} + e^{z_2}} = \\frac{e^{z_1}}{e^{z_1} + e^{0}} = \\frac{e^{z_1}}{e^{z_1} + 1} = \\frac{1}{1 + e^{-z_1}} = \\text{Sigmoid}(z_1)\n",
    "$$\n",
    "现在考虑 $K$ 个互斥类别，这是一个**多项分布 (Multinomial Distribution)**。假设对于一个输入样本 $\\mathbf{x}$，其真实类别为第 $i$ 类。模型预测的各个类别的概率为 $p_1, p_2, \\dots, p_K$，其中 $p_j = P(y=j | \\mathbf{x})$。\n",
    "\n",
    "我们的目标是找到一组模型参数 $\\theta$，使得对于给定的训练数据集，观测到真实标签的**似然 (Likelihood)** 最大。对于单个样本 $\\mathbf{x}$，其真实标签是 $i$，那么似然函数就是 $L(\\theta) = p_i$。\n",
    "\n",
    "为了方便计算，我们通常最大化**对数似然 (Log-Likelihood)**：\n",
    "$\\log L(\\theta) = \\log p_i$\n",
    "\n",
    "我们希望将模型的输出分数 $z_1, z_2, \\dots, z_K$ 与概率 $p_1, p_2, \\dots, p_K$ 联系起来。一个简单而有效的建模方式是假设对数概率 $\\log p_j$ 与分数 $z_j$ 成正比：\n",
    "\n",
    "$$\n",
    "\\log p_j \\propto z_j \\implies \\log p_j = z_j - C\n",
    "$$\n",
    "\n",
    "这里的 $C$ 是一个常数，因为它不依赖于类别 $j$，所以对于所有类别都是一样的。这个常数 $C$ 的作用是确保所有概率之和为 1，即起到归一化的作用。\n",
    "\n",
    "从上式可得：\n",
    "$$\n",
    "p_j = e^{z_j - C} = e^{z_j} \\cdot e^{-C}\n",
    "$$\n",
    "\n",
    "因为所有类别的概率之和必须为 1：\n",
    "$$\n",
    "\\sum_{j=1}^{K} p_j = \\sum_{j=1}^{K} e^{z_j} \\cdot e^{-C} = 1\n",
    "$$\n",
    "\n",
    "$$\n",
    "e^{-C} \\left( \\sum_{j=1}^{K} e^{z_j} \\right) = 1 \\implies e^{-C} = \\frac{1}{\\sum_{j=1}^{K} e^{z_j}}\n",
    "$$\n",
    "\n",
    "将 $e^{-C}$ 代回到 $p_j = e^{z_j} \\cdot e^{-C}$ 的表达式中，我们得到：\n",
    "\n",
    "$$\n",
    "p_j = \\frac{e^{z_j}}{\\sum_{k=1}^{K} e^{z_k}}\n",
    "$$\n",
    "\n",
    "这正是 Softmax 公式。这个推导说明，Softmax 函数是源于用线性分数来建模对数概率，并通过最大似然原理自然导出的结果。"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "956f57b9",
   "metadata": {},
   "source": [
    "## Model Parameters\n",
    "利用模型的 parameters() 或 named_parameters() 方法访问所有参数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "1ecb018b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model structure: NeuralNetwork(\n",
      "  (flatten): Flatten(start_dim=1, end_dim=-1)\n",
      "  (linear_relu_stack): Sequential(\n",
      "    (0): Linear(in_features=784, out_features=512, bias=True)\n",
      "    (1): ReLU(inplace=True)\n",
      "    (2): Linear(in_features=512, out_features=512, bias=True)\n",
      "    (3): ReLU(inplace=True)\n",
      "    (4): Linear(in_features=512, out_features=10, bias=True)\n",
      "  )\n",
      ")\n",
      "\n",
      "\n",
      "Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0087, -0.0049,  0.0345,  ..., -0.0021,  0.0271,  0.0087],\n",
      "        [-0.0067, -0.0071,  0.0065,  ...,  0.0173, -0.0013,  0.0086]],\n",
      "       device='cuda:0', grad_fn=<SliceBackward0>) \n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(f\"Model structure: {model}\\n\\n\")\n",
    "\n",
    "for name, param in model.named_parameters():\n",
    "    print(f\"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \\n\")\n",
    "    break"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dl",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
